library(tidyverse)
## -- Attaching packages -------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ----------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(nycflights13)
## Warning: package 'nycflights13' was built under R version 3.6.3

Manipulando Dados

1. Renomear Variáveis (Rename)

Quais são os nomes das variáveis?

names(flights)
##  [1] "year"           "month"          "day"            "dep_time"      
##  [5] "sched_dep_time" "dep_delay"      "arr_time"       "sched_arr_time"
##  [9] "arr_delay"      "carrier"        "flight"         "tailnum"       
## [13] "origin"         "dest"           "air_time"       "distance"      
## [17] "hour"           "minute"         "time_hour"

Renomear a variável arr_time

flights %>% rename(arrival_time = arr_time)
flights
flights_renomeado <- flights %>% rename(arrival_time = arr_time) 

flights_renomeado
flights2 <- flights %>% rename(arrival_time = arr_time) %>% 
  rename(departure_time = dep_time)

flights
flights3 <- flights %>% rename(arrival_time = arr_time,
                              departure_time = dep_time)

flights3

2. Selecionar Variáveis (Select)

flights %>% select(year, month, day)
flights4 <- flights %>% select(year, month, day)
flights4

3. Transformar Variáveis (Mutate)

flights5 <- flights %>%  mutate(dep_delay_dobro = dep_delay*2)

flights5
flights %>%  mutate(calculo_metade_diferenca = (arr_time - dep_time)/2 )
flights %>% mutate(dep_delay = dep_delay * 60)
flights %>% mutate(origin = tolower(origin))

4. Cortar/selecionar Observações (Slice)

flights %>%  slice(5)
flights %>%  slice(1:5)
linhas_desejadas <- c(1, 4, 5, 6, 22, 169)


flights %>% slice(linhas_desejadas)
flights %>% slice(-1)
flights %>% slice(10:20)
flights %>% slice(seq(from = 1, to = 100, by = 10))
seq(from = 1, to = 100, by = 10)
##  [1]  1 11 21 31 41 51 61 71 81 91
flights %>% slice(seq(1, 100, 10))
# ?seq

5. Filtrar Observações (Filter)

flights_junho <- flights %>% filter(month == 6)


flights_junho
42 == 41 # FALSE
## [1] FALSE
42 != 41 # TRUE
## [1] TRUE
(2 + 2) == (3 + 1) # TRUE
## [1] TRUE
(2 + 2) != (3 + 1) # FALSE 
## [1] FALSE
5 > 3  # TRUE
## [1] TRUE
5 < 3   # FALSE 
## [1] FALSE
42 > 42 # FALSE
## [1] FALSE
42 < 41  # FALSE
## [1] FALSE
42 >= 42 # TRUE
## [1] TRUE
42 <= 41 # FALSE
## [1] FALSE
"texto" == "texto" # TRUE
## [1] TRUE
"texto" == "texTo" # FALSE
## [1] FALSE
"texto" != "texto" # FALSE
## [1] FALSE
"a" > "b" # FALSE
## [1] FALSE
"a" < "b" # TRUE
## [1] TRUE
"A" < "b" # TRUE
## [1] TRUE
"A" > "a" # TRUE - Surpresa - o maiúsculo é considerado maior que a mesma letra minúscula
## [1] TRUE
TRUE == 1 # TRUE 
## [1] TRUE
FALSE == 0 # TRUE
## [1] TRUE
TRUE > FALSE # TRUE
## [1] TRUE
x <- 5
y <- 10
x > y   #FALSE
## [1] FALSE
flights %>%  filter(month == 6 & day == 5)
flights %>% filter(month == 6 & day == 5 & dep_time < 1200)
flights %>%  filter((dep_time <= 500 |
                       dest == "ATL") & arr_delay >= 50)
flights %>%  filter(dep_time <= 500 |
                       dest == "ATL"
                    & arr_delay >= 50)
flights %>%  filter(!((dep_time <= 500 |
                       dest == "ATL") & arr_delay >= 50))

Combinando Manipulações

flights %>% 
  rename(arrival_time = arr_time) %>% 
  mutate(dep_delay = dep_delay * 60) %>% 
  filter(month == 6 & day == 5) %>% 
  select(year, month, day, arrival_time, dep_delay) 

Exercício 1: Manipulando dados

rstudioapi::navigateToFile("aula_2_exercicios.Rmd")

Abrindo Dados

Dados em arquivos textuais (.csv, .tsv, .txt)

file1 <- "https://raw.githubusercontent.com/leobarone/ifch_intro_r/master/data/bf_amostra_hv.csv"
dados <- read_csv(file1)
## Parsed with column specification:
## cols(
##   uf = col_character(),
##   codmunic = col_double(),
##   munic = col_character(),
##   nis = col_double(),
##   valor = col_double()
## )
dados <- read_delim(file1,
                    delim = ",")
## Parsed with column specification:
## cols(
##   uf = col_character(),
##   codmunic = col_double(),
##   munic = col_character(),
##   nis = col_double(),
##   valor = col_double()
## )
file_semi_colon <-
  "https://raw.githubusercontent.com/leobarone/ifch_intro_r/master/data/bf_amostra_hp.csv"
dados <- read_delim(file_semi_colon,
                    delim = ";")
## Parsed with column specification:
## cols(
##   uf = col_character(),
##   codmunic = col_double(),
##   munic = col_character(),
##   nis = col_double(),
##   valor = col_double()
## )
file_tab <-
  "https://raw.githubusercontent.com/leobarone/ifch_intro_r/master/data/bf_amostra_ht.csv"
dados <- read_delim(file_tab,
                    delim = "\t")
## Parsed with column specification:
## cols(
##   uf = col_character(),
##   codmunic = col_double(),
##   munic = col_character(),
##   nis = col_double(),
##   valor = col_double()
## )
file_sem_header <-
  "https://raw.githubusercontent.com/leobarone/ifch_intro_r/master/data/bf_amostra_nv.csv"

dados <- read_delim(file_sem_header,
                    col_names = F,
                    delim = ",")
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_double(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double()
## )
dados <- read_delim(
  file_sem_header,
  col_names = c(
    "estado",
    "municipio_cod",
    "municipio_nome",
    "NIS",
    "transferido"
  ),
  delim = ","
)
## Parsed with column specification:
## cols(
##   estado = col_character(),
##   municipio_cod = col_double(),
##   municipio_nome = col_character(),
##   NIS = col_double(),
##   transferido = col_double()
## )
dados <- read_delim(file1,
                    delim = ",",
                    col_types = "cicid")


dados <- read_csv(file1)
## Parsed with column specification:
## cols(
##   uf = col_character(),
##   codmunic = col_double(),
##   munic = col_character(),
##   nis = col_double(),
##   valor = col_double()
## )
dados <- read_delim(file1,
                    delim = ",",
                    locale = locale(decimal_mark = ",", grouping_mark =
                                      "."))
## Parsed with column specification:
## cols(
##   uf = col_character(),
##   codmunic = col_double(),
##   munic = col_character(),
##   nis = col_double(),
##   valor = col_double()
## )
dados <- read_delim(file1,
                    delim = ",",
                    locale = locale(encoding = 'latin1'))
## Parsed with column specification:
## cols(
##   uf = col_character(),
##   codmunic = col_double(),
##   munic = col_character(),
##   nis = col_double(),
##   valor = col_double()
## )

Dados em arquivos de Excel

library("readxl")
library(readxl)
url <- "ftp://ftp.ibge.gov.br/Perfil_Municipios/2005/base_MUNIC_2005.zip"
destfile <- "dados/base_MUNIC_2005.zip"
curl::curl_download(url, destfile)
utils::unzip(destfile)

excel_sheets("dados/Base 2005.xls")
##  [1] "Dicionário"           "Informações prefeito" "Adm Direta"          
##  [4] "Adm Indireta"         "Leg e inst planej"    "Recursos gestão"     
##  [7] "Articulações inter"   "Habitação"            "Transporte"          
## [10] "Cultura"              "Variáveis externas"
externas <- read_excel("dados/Base 2005.xls", "Variáveis externas")

externas <- read_excel("dados/Base 2005.xls", 11)

head(externas)

Dados de SPSS, Stata e SAS

library("haven")

latino_barometro_spss <- read_spss("dados/F00004529-Latinobarometro_2015_sav/Latinobarometro_2015_Eng.sav")


latino_barometro_stata <- read_stata("dados/F00004530-Latinobarometro_2015_dta/Latinobarometro_2015_Eng.dta")